home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/perl
-
- # swish-web.cgi
- # runs a SWISH search from a Web page
-
- $SwishLocation = "/usr/local/bin/swish";
- $DefaultIndexLocation = "/usr/home/shpank/public_html/cavalcade.swish";
-
- @MultiIndexLocation =
- (
- "/usr/home/shpank/public_html/cavalcade.swish",
- "/usr/home/shpank/public_html/cavalcade.swish"
- );
-
- $BaseDirectory = "/usr/home/shpank/public_html/";
- $BaseURL = "http://cavalcade-whimsey.com/";
-
- $ShowFilePaths = 1;
-
- $PrintBoldLinks = 1; # this applies to page titles
- $HotlinkURLs = 0; # duplicate the hotlink on the URL line
- $GoofyKeyword = "oQiTb2lkCv";
-
- $ReturnPageURL = "http://cavalcade-whimsey.com/index.shtml";
- $ReturnPagePrompt = "The \"Cavalcade of Whimsey\"";
-
- # edit either the simple or the detailed user form:
- $SwishFormURL = "http://cavalcade-whimsey.com/index.shtml";
- $SwishFormPrompt = "Search Again";
-
- #add backslash before @ in address here ("you\@your.org")
- $MailtoAddress = "shpank\@beachin.net";
- $MailtoPrompt = "E-mail: ";
- $MailtoName = "Shpank";
-
- # string constants
- $Space = ' ';
- $Zero = '0';
- $HugeFileSize = '1000000000';
- $HugeTime = '1000000000';
-
-
- #---------------------------------------------------------------------
- # Print Page Top
- #---------------------------------------------------------------------
-
- sub PrintPageTop
- {
- print <<ENDPRINT;
-
- <html>
- <head>
- <title>Search Results for: $Keywords</title>
- </head>
- <body bgcolor="ffffff">
-
- <center><table width=500>
- <tr><td><font size=-1>
- <a name="top" href="$ReturnPageURL">$ReturnPagePrompt</a><br>
- <a href="$SwishFormURL">$SwishFormPrompt</a>
- <p>
- Search Results for:<font size=+1 color="#000088"> $Keywords </font>
- <p>
- Remember: If you have a file that is not found here, or you would like to contribute to this
- database, please <a href="mailto:shpank\@beachin.net">mail</a> me. Thanks.
- <hr width=85% noshade>
-
- ENDPRINT
- }
-
-
- #---------------------------------------------------------------------
- # Print Page End
- #---------------------------------------------------------------------
-
- sub PrintPageEnd
- {
- print <<ENDPRINT;
-
- <hr width=85% noshade>
- <p>
- If you didn't find what you were looking for, please try again.
- <p>
- <FORM METHOD="POST"
- ACTION="http://cavalcade-whimsey.com/shpank/swish-web.cgi">
-
- <input type="text" name="keywords" size="40">
- <input type="submit" value="Search">
- <input type="reset" value="Clear">
- <br>
- <select name="indexnumber">
- <option value="1" selected>Home
- </select>
- <select name="maxhits">
- <option>10
- <option selected>25
- <option>50
- <option>100
- <option>all
- </select>
- Results
- <p>
- </center></form>
- <p>
- <center><a href="mailto:shpank\@beachin.net">Shpank</a>
-
- </font>
- </td></tr></table></center>
-
- </body>
- </html>
-
- ENDPRINT
- }
-
-
- sub SetupFileTypeDescriptions
- {
- # use " " (one space) as description to avoid displaying file type
- %mimetype = (
- ".html", " ",
- ".htm", "html",
- ".txt", "plain text",
- ".pdf", "Adobe Portable Document Format",
- ".ps", "Postscript",
- ".eps", "Encapsulated Postscript",
- ".rtf", "Rich Text Format",
- ".man", "Unix manual page",
- ".gif", "GIF image",
- ".jpg", "JPEG image",
- ".jpeg", "JPEG image",
- ".jpe", "JPEG image",
- ".pict", "PICT image",
- ".xbm", "X bitmap image",
- ".png", "PNG image",
- ".au", "AU audio",
- ".snd", "Mac SND audio",
- ".mpg", "MPEG movie",
- ".avi", "MS video",
- ".mov", "QuickTime movie",
- ".qt", "QuickTime movie",
- ".Z", "Unix Z compressed",
- ".gz", "Unix gzip compressed",
- ".zip", "compressed",
- ".uu", "uuencoded",
- ".hqx", "Binhex",
- ".tar", "Unix archive",
- ".tex", "Tex or LaTex document",
- ".wav", "Windows WAV audio",
- ".c", "C source",
- ".pl", "Perl source",
- ".py", "Python source",
- ".tcl", "TCL source",
- ".??", "unknown file type" );
- }
-
-
- #=====================================================================
- # END OF USER CONFIGURATION SECTION
- #=====================================================================
-
-
- #---------------------------------------------------------------------
- # PROCESS DATA FROM FORM
- #---------------------------------------------------------------------
-
- &LocalReadParse;
-
- $Keywords = $in{'keywords'};
- # allow only numbers, chars, whitespace, asterisks and parentheses
- $Keywords =~ s/[^\w()*\s]//g;
-
- $MaxHits = $in{'maxhits'};
- # remove anything not a number, or char in "all"
- $MaxHits =~ s/[^\dal]//g;
-
- $IndexNumber = $in{'indexnumber'};
- # remove anything not a digit
- $IndexNumber =~ s/[^\d]//g;
-
- $IndexData = $in{'indexdata'};
- $SearchAll = $in{'searchall'};
-
- $Tags_Head = $in{'head'};
- $Tags_Title = $in{'title'};
- $Tags_Body = $in{'body'};
- $Tags_Headings = $in{'headings'};
- $Tags_Emphasized = $in{'emphasized'};
- $Tags_Comments = $in{'comments'};
-
- $Tags = "";
- if ($Tags_Head)
- {
- $Tags = $Tags."H";
- }
- if ($Tags_Title)
- {
- $Tags = $Tags."t";
- }
- if ($Tags_Body)
- {
- $Tags = $Tags."B";
- }
- if ($Tags_Headings)
- {
- $Tags = $Tags."h";
- }
- if ($Tags_Emphasized)
- {
- $Tags = $Tags."e";
- }
- if ($Tags_Comments)
- {
- $Tags = $Tags."c";
- }
-
-
- if ($IndexNumber)
- {
- $IndexLocation = $MultiIndexLocation[$IndexNumber -1];
- }
- else
- {
- $IndexLocation = $DefaultIndexLocation;
- }
-
- # If the "Search entire files" box is checked, or when using a
- # simple form without any of the above variables, then search
- # everything by default.
-
- if (($SearchAll) ||
- (!$SearchAll
- && !$Tags_Head
- && !$Tags_Title
- && !$Tags_Body
- && !$Tags_Headings
- && !$Tags_Emphasized
- && !$Tags_Comments))
- {
- $Tags = "";
- }
-
- &SetupFileTypeDescriptions;
-
- #---------------------------------------------------------------------
- # PRINT HTML PAGE FROM SEARCH RESULT
- #---------------------------------------------------------------------
-
- print "Content-type: text/html\n\n";
- &PrintPageTop;
-
- if (-e $SwishLocation)
- {
- if (-x $SwishLocation)
- {
- # continue
- }
- else
- {
- print "<b>SWISH not executable by Web server</b>";
- if ($ShowFilePaths)
- {
- print " - $SwishLocation";
- }
- print "\n";
- &PrintPageEnd;
- exit;
- }
- }
- else
- {
- print "<b>SWISH not found</b>";
- if ($ShowFilePaths)
- {
- print " - $SwishLocation";
- }
- print "\n";
- &PrintPageEnd;
- exit;
- }
-
- if ($Tags)
- {
- if (open (SWISHOUT, "-|") || exec $SwishLocation,
- "-w", split(/ /, $Keywords),
- "-f", $IndexLocation,
- "-m", $MaxHits,
- "-t", $Tags)
- {
- &RunSearch;
- }
- }
- else
- {
- if (open (SWISHOUT, "-|") || exec $SwishLocation,
- "-w", split(/ /, $Keywords),
- "-f", $IndexLocation,
- "-m", $MaxHits)
- {
- &RunSearch;
- }
- }
-
- &PrintPageEnd;
- exit;
-
-
- #---------------------------------------------------------------------
- # RUN SEARCH
- #---------------------------------------------------------------------
-
- sub RunSearch
- {
- $ArrayIndex = 0;
- $LineNumber = 1;
- while ($LINE=<SWISHOUT>)
- {
- chop ($LINE);
- $LineRecognized = 0;
-
- # the line may contain something other than a link
- $LineRecognized = &ScanLineForIndexData;
- if (!$LineRecognized)
- {
- $LineRecognized = &ScanLineForComments;
- if (!$LineRecognized)
- {
- $LineRecognized = &ScanLineForErrorMessage;
- if (!$LineRecognized)
- {
- $LineRecognized = &ScanLineForUsageReport;
- if (!$LineRecognized)
- {
- # then otherwise the line contains a link
- &ExtractLinkFromLine;
- if ($score)
- {
- &AddLink;
- $ArrayIndex ++;
- }
- }
- }
- }
- }
- }
- close (SWISHOUT);
-
- print "<blockquote>\n";
- print "<dl>\n";
-
- $EndArrayIndex = @LinkArray;
- for ($ArrayIndex = 0; $ArrayIndex < $EndArrayIndex; $ArrayIndex++)
- {
- $LiveFileExists = &CheckLink;
- if ($LiveFileExists)
- {
- &PrintLink;
- }
- }
-
- print "</dl>\n";
- print "</blockquote>\n";
-
- # if "show data about index" box is checked on HTML form
- if ($IndexData)
- {
- &PrintIndexData;
- }
- }
-
-
- #--------------------------------------------------------------
- # SCAN LINE FOR INDEX DATA
- #--------------------------------------------------------------
-
- sub ScanLineForIndexData
- {
- $ReturnValue = 0;
-
- if ($LINE =~ /^# SWISH/)
- {
- ($junk, $iformat) = split(/format/,$LINE,2);
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^# Name:/)
- {
- ($junk, $iname) = split(/:/,$LINE,2);
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^# Saved as:/)
- {
- ($junk, $ifilename) = split(/:/,$LINE,2);
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^# Indexed on:/)
- {
- ($junk, $idate) = split(/:/,$LINE,2);
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^# Maintained by:/)
- {
- ($junk, $imaintby) = split(/:/,$LINE,2);
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^# Description:/)
- {
- ($junk, $idesc) = split(/:/,$LINE,2);
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^# Counts:/)
- {
- ($junk, $icounts) = split(/:/,$LINE,2);
- $ReturnValue = 1;
- }
- }
- }
- }
- }
- }
- }
- return ($ReturnValue);
- }
-
-
- #--------------------------------------------------------------
- # SCAN LINE FOR COMMENTS
- #--------------------------------------------------------------
-
- sub ScanLineForComments
- {
- $ReturnValue = 0;
-
- # skip remaining lines with #, "search words", and "."
- if ($LINE =~ /^#/)
- {
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^search/)
- {
- $ReturnValue = 1;
- }
- else
- {
- if ($LINE =~ /^\./)
- {
- $ReturnValue = 1;
- }
- }
- }
- return ($ReturnValue);
- }
-
-
- #--------------------------------------------------------------
- # SCAN LINE FOR ERROR MESSAGE
- #--------------------------------------------------------------
-
- sub ScanLineForErrorMessage
- {
- $ReturnValue = 0;
-
- # if there is an error message, output it
- # (swish error messages start with err:)
- if ($LINE =~ /^err:/)
- {
- ($err, $message) = split(/:/,$LINE,2);
- print "<dt><dd>";
- print "<b>$message</b><br>";
- print "</dd>\n";
- $ReturnValue = 1;
- }
-
- return ($ReturnValue);
- }
-
-
- #--------------------------------------------------------------
- # SCAN LINE FOR USAGE REPORT
- #--------------------------------------------------------------
-
- sub ScanLineForUsageReport
- {
- $ReturnValue = 0;
-
- # Swish reports its usage if called incorrectly.
- # If so, print the message.
- if ($LINE =~ / usage:/)
- {
- print "<dt><dd>";
- print "<p><b>An error occured when calling Swish:</b><br>";
- print "<p>$SwishLocation -w $Keywords -f $IndexLocation -m $MaxHits -t $Tags<br>";
- print "<p>Swish returned:";
- print "<p><pre>";
- print "$LINE";
- while ($LINE=<SWISHOUT>)
- {
- print "$LINE\n";
- }
- print "</pre>";
- print "</dd>";
-
- $ReturnValue = 1;
- }
- return ($ReturnValue);
- }
-
-
- #--------------------------------------------------------------
- # EXTRACT LINK FROM LINE
- #--------------------------------------------------------------
-
- sub ExtractLinkFromLine
- {
- # extract the four elements of a good search result line
- # (Thanks to Ian Phillips & Joshua Sean Bell)
-
- ($score, $link, $title, $bytes) =
- ($LINE =~ m/^(\d+)\s+(\S+)\s+"([^\"]+)"\s+(\d+)\s*$/);
-
- # get the mime type for the file extension
-
- ($ext = "\L$1\E" || ".??") if ($link =~ /(\.\w+)$/);
- $filetype = $mimetype{$ext} || $mimetype{".??"} ;
- }
-
-
- #---------------------------------------------------------------------
- # ADD LINK TO ARRAY
- #---------------------------------------------------------------------
-
- sub AddLink
- {
- $LinkArray[$ArrayIndex] = $link;
- $TitleArray[$ArrayIndex] = $title;
- $ScoreArray[$ArrayIndex] = $score;
- $BytesArray[$ArrayIndex] = $bytes;
- $FileModDateArray[$ArrayIndex] = "";
- $FileModTimeArray[$ArrayIndex] = "";
- $ExtArray[$ArrayIndex] = $ext;
- $FileTypeArray[$ArrayIndex] = $filetype;
- }
-
-
- #---------------------------------------------------------------------
- # PRINT LINK
- #---------------------------------------------------------------------
-
- sub PrintLink
- {
- print "<p>\n";
- print "<dt><b>$LineNumber. <a href=\"$LinkArray[$ArrayIndex]\">$TitleArray[$ArrayIndex]</a></b>\n";
- if ($HotlinkURLs)
- {
- print "<dd><a href=\"$LinkArray[$ArrayIndex]\">$LinkArray[$ArrayIndex]</a>\n";
- }
- else
- {
- print "<dd>$LinkArray[$ArrayIndex]\n";
- }
- print "<dd><b>$ScoreArray[$ArrayIndex]</b> score, ";
- print "$BytesArray[$ArrayIndex] bytes, ";
- print "$FileModDateArray[$ArrayIndex] $FileModTimeArray[$ArrayIndex]";
- if ($FileTypeArray[$ArrayIndex] != " ")
- {
- print " - $FileTypeArray[$ArrayIndex]";
- }
- print "\n";
- $LineNumber ++;
- }
-
-
- sub CheckLink
- {
- $LiveFileFound = 0;
-
- $FileName = $LinkArray[$ArrayIndex];
- $FileName =~ s#$BaseURL##;
- $FileSpec = $BaseDirectory.$FileName;
-
- if (-e $FileSpec)
- {
- $LiveFileFound = 1;
- &GetFileStats;
- $BytesArray[$ArrayIndex] = $ShortFileSize;
- $FileModDateArray[$ArrayIndex] = $FileModDate;
- $FileModTimeArray[$ArrayIndex] = $FileModTime;
- }
- return ($LiveFileFound);
- }
-
-
- #----------------------------------------------------------------
- # Get File Stats
- #----------------------------------------------------------------
-
- sub GetFileStats
- {
- ( $Device,
- $Inode,
- $FilePerms,
- $NumHardLinks,
- $Uid,
- $Gid,
- $DeviceType,
- $FileSize,
- $AccessedTime,
- $ModifiedTime,
- %DontCare)
- = stat($FileSpec);
-
- if ($FileSize < $HugeFileSize)
- {
- $ShortFileSize = $FileSize;
- while (length ($FileSize) < length ($HugeFileSize))
- {
- $FileSize = $Zero.$FileSize;
- }
- }
-
- if ($ModifiedTime < $HugeTime)
- {
- while (length ($ModifiedTime) < length ($HugeTime))
- {
- $ModifiedTime = $Zero.$ModifiedTime;
- }
- }
-
- ( $Seconds,
- $Minutes,
- $HourNum,
- $MonthDay,
- $MonthNum,
- $Year,
- $WeekdayNum,
- $YeardayNum,
- $DSTFlag)
- = localtime ($ModifiedTime);
-
- %MonthWord =
- (0, "Jan",
- 1, "Feb",
- 2, "Mar",
- 3, "Apr",
- 4, "May",
- 5, "Jun",
- 6, "Jul",
- 7, "Aug",
- 8, "Sep",
- 9, "Oct",
- 10, "Nov",
- 11, "Dec");
-
- $FileModDate = "$MonthWord{$MonthNum} $MonthDay '$Year";
-
- if ($Minutes < 10)
- {
- $FileModTime = "$HourNum:$Zero$Minutes";
- }
- else
- {
- $FileModTime = "$HourNum:$Minutes";
- }
- }
-
-
- #---------------------------------------------------------------------
- # PRINT INDEX DATA
- #---------------------------------------------------------------------
-
- sub PrintIndexData
- {
- if (!$Keywords)
- {
- &SearchFileForIndexData;
- }
- print "<hr>";
- print "<blockquote>\n";
- print "Index name: <b>$iname</b><br>\n";
- print "Description: <b>$idesc</b><br>\n";
- print "Index contains: <b>$icounts</b><br>\n";
- if ($ShowFilePaths)
- {
- print "Location: <b>$IndexLocation</b><br>\n";
- print "Saved as (internal name): <b>$ifilename</b><br>\n";
- }
- print "SWISH Format: <b>$iformat</b><br>\n";
- print "Maintained by: <b>$imaintby</b><br>\n";
- print "Indexed on: (day/month/year): <b>$idate</b><br>\n";
- if (open (SWISHOUT, "-|") || exec $SwishLocation, "-V")
- {
- $SwishVersion = <SWISHOUT>;
- close (SWISHOUT);
- print "Searched with: <b>$SwishVersion</b><br>\n";
- }
- print "</blockquote>\n";
- }
-
-
- #---------------------------------------------------------------------
- # SEARCH FILE FOR INDEX DATA
- #---------------------------------------------------------------------
-
- # If the form's input field is blank, ordinarily no search is made,
- # which prevents reading the index file for the index data. In that
- # case, the following subroutine is called.
-
- sub SearchFileForIndexData
- {
- # use a keyword that definitely won't be found
- $Keywords = $GoofyKeyword;
- if (open (SWISHOUT, "-|") || exec $SwishLocation,
- "-f", $IndexLocation,
- "-w", $Keywords)
- {
- while ($LINE=<SWISHOUT>)
- {
- chop ($LINE);
- &ScanLineForIndexData;
- }
- close (SWISHOUT);
- }
- }
-
-
- #=====================================================================
- # GENERAL PURPOSE (BOILERPLATE) ROUTINES
- #=====================================================================
-
- sub LocalReadParse
- # from Steven Brenner's cgi-lib.pl v1.14
- {
- local (*in) = @_ if @_;
- local ($i, $key, $val);
-
- # Read in text
- if (&LocalMethGet)
- {
- $in = $ENV{'QUERY_STRING'};
- }
- elsif (&LocalMethPost)
- {
- read(STDIN,$in,$ENV{'CONTENT_LENGTH'});
- }
-
- @in = split(/[&;]/,$in);
-
- foreach $i (0 .. $#in)
- {
- # Convert plusses to spaces
- $in[$i] =~ s/\+/ /g;
-
- # Split into key and value
- # splits on the first =
- ($key, $val) = split(/=/,$in[$i],2);
-
- # Convert %XX from hex numbers to alphanumeric
- $key =~ s/%(..)/pack("c",hex($1))/ge;
- $val =~ s/%(..)/pack("c",hex($1))/ge;
-
- # Associate key and value
- # \0 is the multiple separator
- $in{$key} .= "\0" if (defined($in{$key}));
- $in{$key} .= $val;
- }
- return scalar(@in);
- }
-
- sub LocalMethGet
- # from Steven Brenner's cgi-lib.pl v1.14
- # true if this cgi call was using the GET request, false otherwise
- {
- return ($ENV{'REQUEST_METHOD'} eq "GET");
- }
-
- sub LocalMethPost
- # from Steven Brenner's cgi-lib.pl v1.14
- # true if this cgi call was using the POST request, false otherwise
- {
- return ($ENV{'REQUEST_METHOD'} eq "POST");
- }
-